## This is the replication R code for Cao 2010: Networks As Channels of Policy Diffusion: Explaining Worldwide Changes in Capital Taxation, 1998-2006. 
## the data needed to run the following code are posted in the author's website: privatewww.essex.ac.uk/~caox
## there, you will find detailed instruction there on how to download the data and run the replication code. 

dd <- c("C:/Users/Zach/Desktop/R Files") 
setwd(dd)
library(xtable)
library(stargazer)
library(cem)
library(MatchIt)
library(plm)
library(Amelia)
library(Zelig)
library(MatchingFrontier)
library(car)
library(lmtest)
library(DataCombine)



####
# DVs: tax on corporate
Tax_corp<-dget("DV/Tax_corp_wdi9806")

##### now, let's model:
dv.tax.c<-year<-cty<-NULL
for (j in 1:dim(Tax_corp)[2]){dv.tax.c<-c(dv.tax.c, Tax_corp[, j])
                              year<-c(year, rep(c(1998:2006)[j], dim(Tax_corp)[1]))
                              cty<-c(cty, rownames(Tax_corp))
}

Tax_corp_lg<-Tax_corp
Tax_corp_lg[,4]<-Tax_corp_lg[,3]
Tax_corp_lg[,8]<-Tax_corp_lg[,7]

dv.tax.c.lg<-rep(NA, dim(Tax_corp_lg)[1])
for (j in 1:(dim(Tax_corp_lg)[2]-1)){dv.tax.c.lg<-c(dv.tax.c.lg, Tax_corp_lg[, j])}


# Add in nodal characteristics:
# inflation rate
# GDP growth
# umemployment rate
# age dependency
# financial openness (legal)
# trade openness
# public debt (missing values)


## Inflation rate
Infl<-dget("Node/weoINFL8005"); Infl[, dim(Infl)[2]]<-as.numeric(Infl[, dim(Infl)[2]])
infl<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Infl) & as.character(year[i]) %in% colnames(Infl)){infl<-c(infl, Infl[cty[i], as.character(year[i])])}
                              else {infl<-c(infl, NA)}}
infl.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Infl) & as.character(year[i]-1) %in% colnames(Infl)){infl.lg<-c(infl.lg, Infl[cty[i], as.character(year[i]-1)])}
                              else {infl.lg<-c(infl.lg, NA)}
}

## DGP growth
Grow<-dget("Node/WDI_GDPgrow6105")
Temp<-Grow
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
grow<-iv; grow.lg<-iv.lg


# GDP per capita
GDDPPC<-dget("Node/weoGDPPC8005")
Temp<-GDDPPC
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
gdppc<-as.numeric(iv); gdppc.lg<-as.numeric(iv.lg)
gdppc<-gdppc/1000; gdppc.lg<-gdppc.lg/1000

## Unemployment rate
Unemp<-dget("Node/un_wdi9006") # there are lots of missing values here.
Temp<-Unemp
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
unemp<-iv; unemp.lg<-iv.lg


## Net government debt
Debt<-dget("Node/weoNDEBT8005") # there are lots of missing values here.
Temp<-Debt
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
debt<-iv; debt.lg<-iv.lg


## Age dependency
DP65<-dget("Node/d65_wdi9006")
Temp<-DP65
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
dp65<-iv; dp65.lg<-iv.lg

DP14<-dget("Node/d14_wdi9006")
Temp<-DP14
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
dp14<-iv; dp14.lg<-iv.lg

DP=dp14+dp65

## Financial Openness:
CapOpen<-dget("Node/CapOpen_extd7005")
Temp<-CapOpen
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
capopen<-iv; capopen.lg<-iv.lg


## Trade Openness:
TradeOpen<-dget("Node/DOT_WEO_tradeopen9005")
Temp<-TradeOpen
iv<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]) %in% colnames(Temp)){iv<-c(iv, Temp[cty[i], as.character(year[i])])}
                              else {iv<-c(iv, NA)}}
iv.lg<-NULL
for (i in 1:length(dv.tax.c)){if (cty[i] %in% rownames(Temp) & as.character(year[i]-1) %in% colnames(Temp)){iv.lg<-c(iv.lg, Temp[cty[i], as.character(year[i]-1)])}
                              else {iv.lg<-c(iv.lg, NA)}
}
tradeopen<-iv; tradeopen.lg<-iv.lg


######### Add in Geography and Common Language ##########
# 2.  smallest distance, 
mdd<-read.csv("W/distance/smallmdd.csv", as.is=T)
mdd$ida[mdd$ida=="GFR"]<-"GMY"; mdd$idb[mdd$idb=="GFR"]<-"GMY"
mdd$ida[mdd$ida=="YUG"]<-"SER"; mdd$idb[mdd$idb=="YUG"]<-"SER"

mdd98 <- na.omit(subset(mdd, mdd$year>=1998)) #no NAs - BUT NO NAs ANYWAY

mdd98 <- subset(mdd, mdd$year>=1998)

wmdd<-matrix(NA, ncol=length(rownames(Tax_corp)),nrow=length(rownames(Tax_corp)))
rownames(wmdd)<-colnames(wmdd)<-rownames(Tax_corp)

for (i in 1:dim(mdd98)[1]) {if (mdd98$ida[i] %in% rownames(Tax_corp) & mdd98$idb[i] %in% rownames(Tax_corp))
{wmdd[mdd98$ida[i],mdd98$idb[i]] <- mdd98$mindist[i]}}

# notice that there are large amounts of NA: could be missing values,
#                                            could be the distance between two countries's closest points of their outer 
#                                            boundaries are bigger than 950 km. 

# strict contiguity: share land border
thres<-950
w<-wmdd
w[w>thres]<-NA
w[w<=thres]<-1; 
w[is.na(w)]<-0

geomatch<-rowSums(w)
geomatch=rep(geomatch,9)
sm<-rowSums(w)
sm[sm==0]<-1

w<-w/sm

Neib.mdd<-rep(NA, dim(w)[1])
year.lag<-c(1998, 1999, 2000, 2000, 2002, 2003, 2004, 2004) 

Tax<-Tax_corp
Tax[is.na(Tax)]<-0
for (k in 1:length(year.lag)){
  neib.mdd<-w%*%Tax[, as.character(year.lag[k])] 
  Neib.mdd<-c(Neib.mdd, neib.mdd)
}

## Common language:
w_lng<-dget("W/culture/w_clang")

wl<-matrix(0, ncol=length(rownames(Tax_corp)), nrow=length(rownames(Tax_corp)))
colnames(wl)<-rownames(wl)<-rownames(Tax_corp)

for (i in 1:dim(w_lng)[1])      {
  for (j in (1:dim(w_lng)[2])[-i]){if (rownames(w_lng)[i] %in% rownames(Tax_corp)  & colnames(w_lng)[j] %in% rownames(Tax_corp))
  {wl[rownames(w_lng)[i], colnames(w_lng)[j]]<-w_lng[i, j]}
  }}
rs<-rowSums(wl, na.rm=T)
rs[rs==0]<-1

wl<-wl/rs # notice this is row-standardize which results in non-symmetric version:

Neib.lng<-rep(NA, dim(wl)[1])# the first year's temporarily lagged spatial lag is NA

year.lag<-c(1998, 1999, 2000, 2000, 2002, 2003, 2004, 2004) 


for (k in 1:length(year.lag)){
  neib.lng<-wl%*%Tax[, as.character(year.lag[k])] 
  Neib.lng<-c(Neib.lng, neib.lng)
}




#### Move to Networks: portfolio, FDI, trade, IGO ####

# 1. portfolio:
p2001<-dget("W/portfolio/cor.portf2001")
p2002<-dget("W/portfolio/cor.portf2002")
p2003<-dget("W/portfolio/cor.portf2003")
p2004<-dget("W/portfolio/cor.portf2004")
p2005<-dget("W/portfolio/cor.portf2005")
Portf<-list(p2001, p2002, p2003, p2004, p2005)
year.lag<-c(2000, 2002, 2003, 2004, 2004)

Cn<-NULL
portmatch=NULL
Neib.portf<-rep(NA, 4*dim(Tax_corp)[1]) # we don't have data for 1998-2001: notice spatial lags are temporarily lagged also.
for (k in 1:5){se<-Portf[[k]]
               
               wp<-matrix(NA, ncol=length(rownames(Tax_corp)), nrow=length(rownames(Tax_corp)))
               colnames(wp)<-rownames(wp)<-rownames(Tax_corp)
               
               for (i in 1:dim(se)[1])      {
                 for (j in (1:dim(se)[2])[-i]){if (rownames(se)[i] %in% rownames(Tax_corp)  & colnames(se)[j] %in% rownames(Tax_corp))
                 {wp[rownames(se)[i], colnames(se)[j]]<-se[i, j]}
                 }}
               
               diag(wp)<-0
               #se<-se+1
               wp[wp<=0.5]<-0 # this is to use a cutpoint in correlations to specify close neighbors.
               wp[is.na(wp)]<-0
               sm<-rowSums(wp);
               portmatch=c(portmatch,sm)
               sm[sm==0]<-1
               wp<-wp/sm 
               
               counts<-wp
               counts[counts>0]<-1
               Cn<-c(Cn, colSums(counts))
               
               n.se<-wp%*%Tax[, as.character(year.lag)[k]]
               Neib.portf<-c(Neib.portf, n.se)
}

### 2. let's take a look at FDI:
fdi.f1998<-dget("W/fdi/fdi_oecd_un_f_1998")
fdi.f1999<-dget("W/fdi/fdi_oecd_un_f_1999")
fdi.f2000<-dget("W/fdi/fdi_oecd_un_f_2000")
fdi.f2001<-dget("W/fdi/fdi_oecd_un_f_2001")
fdi.f2002<-dget("W/fdi/fdi_oecd_un_f_2002")

fdi.s1998<-dget("W/fdi/fdi_oecd_un_1998")
fdi.s1999<-dget("W/fdi/fdi_oecd_un_1999")
fdi.s2000<-dget("W/fdi/fdi_oecd_un_2000")
fdi.s2001<-dget("W/fdi/fdi_oecd_un_2001")
fdi.s2002<-dget("W/fdi/fdi_oecd_un_2002")


c.f.fdi.1998<-cor(fdi.f1998, use="pairwise.complete.obs") 
c.f.fdi.1999<-cor(fdi.f1999, use="pairwise.complete.obs") 
c.f.fdi.2000<-cor(fdi.f2000, use="pairwise.complete.obs") 
c.f.fdi.2001<-cor(fdi.f2001, use="pairwise.complete.obs") 
c.f.fdi.2002<-cor(fdi.f2002, use="pairwise.complete.obs") 

c.s.fdi.1998<-cor(fdi.s1998, use="pairwise.complete.obs") 
c.s.fdi.1999<-cor(fdi.s1999, use="pairwise.complete.obs") 
c.s.fdi.2000<-cor(fdi.s2000, use="pairwise.complete.obs") 
c.s.fdi.2001<-cor(fdi.s2001, use="pairwise.complete.obs") 
c.s.fdi.2002<-cor(fdi.s2002, use="pairwise.complete.obs") 

FDI<-list(c.f.fdi.1998, 
          c.f.fdi.1999,
          c.f.fdi.2000, 
          c.f.fdi.2001, 
          c.f.fdi.2002)
year.lag<-c(1998, 1999, 2000, 2000, 2002) 


# what about we use k-nearest neighborhood
  fdimatch=rep(NA, 1*dim(Tax_corp)[1])
  Neib.fdi<-rep(NA, 1*dim(Tax_corp)[1]) # we don't have data for 1998's lag, neither for year 2004-2006
  k<-21
  for (i in 1:5){se<-FDI[[i]]; se[is.na(se)]<-0; diag(se)<-0
                 se.ctry<-intersect(rownames(Tax_corp), colnames(se))
                 se<-se[se.ctry, se.ctry]
                 for (j in 1:dim(se)[1]){se[j,][se[j,]<sort(se[j, ], decreasing = T)[k]]<-0} #nearest neigh
                 rs=as.matrix(rowSums(se))
                 se<-se/rowSums(se) 
                 n.se<-se%*%Tax[se.ctry, as.character(year.lag)[i]]
                 nse<-NULL
                 rse=NULL
                 for (j in 1:dim(Tax_corp)[1]){if (rownames(Tax_corp)[j] %in% rownames(n.se)){nse<-c(nse, n.se[rownames(Tax_corp)[j],])}
                                               else {nse<-c(nse, NA)}}
                 for (j in 1:dim(Tax_corp)[1]){if (rownames(Tax_corp)[j] %in% rownames(rs)){rse<-c(rse, rs[rownames(Tax_corp)[j],])}
                                               else {rse<-c(rse, NA)}}
                 fdimatch=c(fdimatch,rse)
                 Neib.fdi<-c(Neib.fdi, nse)
  }
  Neib.fdi<-c(Neib.fdi, rep(NA, 3*dim(Tax_corp)[1])) # we don't have data for 2004-2006: notice spatial lags are temporarily lagged also.
  fdimatch = c(fdimatch,rep(NA, 3*dim(Tax_corp)[1]))
  fit.fdi<-lm(dv.tax.c~dv.tax.c.lg
              +as.factor(cty)
              +as.factor(year)
              +infl
              +grow
              +unemp
              +I(dp65+dp14)
              +gdppc
              +capopen
              +tradeopen
              +Neib.mdd
              # +Neib.lng
              # +Neib.portf
              +Neib.fdi)
  
summary(fit.fdi)


### 3. what about structural equivalence in trade:
## let's use the comtrade data: 

seexp<-dget("W/seexp/expsq_acro98")

seexp.1998<-dget("W/seexp/cor_sitc_stack219_1998")#[colnames(seexp)[-30], colnames(seexp)[-30]] # get rid of "KAK"
seexp.1999<-dget("W/seexp/cor_sitc_stack219_1999")#[colnames(seexp)[-30], colnames(seexp)[-30]]
seexp.2000<-dget("W/seexp/cor_sitc_stack219_2000")#[colnames(seexp)[-30], colnames(seexp)[-30]]
seexp.2001<-dget("W/seexp/cor_sitc_stack219_2001")#[colnames(seexp)[-30], colnames(seexp)[-30]]
seexp.2002<-dget("W/seexp/cor_sitc_stack219_2002")#[colnames(seexp)[-30], colnames(seexp)[-30]]
seexp.2003<-dget("W/seexp/cor_sitc_stack219_2003")#[colnames(seexp)[-30], colnames(seexp)[-30]]
seexp.2004<-dget("W/seexp/cor_sitc_stack219_2004")#[colnames(seexp)[-30], colnames(seexp)[-30]]
seexp.2005<-dget("W/seexp/cor_sitc_stack219_2005")#[colnames(seexp)[-30], colnames(seexp)[-30]]
SE<-list(seexp.1998, seexp.1999, seexp.2000, seexp.2001, seexp.2002, seexp.2003, seexp.2004, seexp.2005)


year.lag<-c(1998, 1999, 2000, 2000, 2002:2004, 2004) ### there are little data for 2001 and 2005, 
### so we have to use the data from the previous year. 

k<-21
Neib.seexp<-rep(NA, 1*dim(Tax_corp)[1])                      # we don't have data for 1998's lag
seexpmatch=rep(NA, 1*dim(Tax_corp)[1])
for (i in 1:8){se<-SE[[i]]; se[is.na(se)]<-0; diag(se)<-0
               se.ctry<-intersect(rownames(Tax_corp), colnames(se))
               se<-se[se.ctry, se.ctry]
               for (j in 1:dim(se)[1]){se[j,][se[j,]<sort(se[j, ], decreasing = T)[k]]<-0}
               sm<-rowSums(se)
               rs=as.matrix(rowSums(se))
               sm[sm==0]<-1
               se<-se/sm 
               n.se<-se%*%Tax[se.ctry, as.character(year.lag)[i]]
               nse<-NULL
               rse=NULL
               for (j in 1:dim(Tax_corp)[1]){if (rownames(Tax_corp)[j] %in% rownames(rs)){rse<-c(rse, rs[rownames(Tax_corp)[j],])}
                                             else {rse<-c(rse, NA)}}
               for (j in 1:dim(Tax_corp)[1]){if (rownames(Tax_corp)[j] %in% rownames(n.se)){nse<-c(nse, n.se[rownames(Tax_corp)[j],])}
                                             else {nse<-c(nse, NA)}}
               Neib.seexp<-c(Neib.seexp, nse)
               seexpmatch=c(seexpmatch,rse)
               }
# 
#Neib.seexp[Neib.seexp==0]<-NA # this is the trick here to have better fit for this.

fit.seexp<-lm(dv.tax.c~dv.tax.c.lg
              +as.factor(cty)
              +as.factor(year)
              +infl
              +grow
              +unemp
              +I(dp65+dp14)
              +gdppc
              +capopen
              +tradeopen
              +Neib.mdd
              # +Neib.lng
              # +Neib.fdi
              +Neib.seexp
)
summary(fit.seexp)



# 4. trade connections: no directionality here:
EX<-dget("W/trade/DOTX8005") 
year.ex<-c(1998:2005)
year.lag<-c(1998:2000, 2000, 2002:2004, 2004)

K=21
Neib.td<-rep(NA, 1*dim(Tax_corp)[1]) # we don't have data for 1998's lag
tradematch = rep(NA, 1*dim(Tax_corp)[1])
for (k in 1:8){
  tr<-EX[[as.character(year.ex[k])]]
  chs<-intersect(rownames(Tax_corp), rownames(tr))
  tr<-tr[chs, chs]+t(tr[chs, chs]) # this is total trade and this is symmetric.
  
  
  for (m in 1:dim(tr)[1]){tr[m,][tr[m,]<sort(as.numeric(tr[m, ]), decreasing = T)[K]]<-0}
  
  w[is.na(w)]<-0
  rs=tr
  rs[is.na(rs)]=0
  rs=as.matrix(rowSums(rs))
    w_td<-tr/rowSums(tr, na.rm=T); 
  w_td[is.na(w_td)]<-0 # need to do a row-standardization for export data:
  
  w<-matrix(0, ncol=length(rownames(Tax_corp)), nrow=length(rownames(Tax_corp)))
  colnames(w)<-rownames(w)<-rownames(Tax_corp)
  rse=as.matrix(rep(0,133))
  rownames(rse)=rownames(w)
  for (i in 1:dim(w_td)[1])      {
    for (j in (1:dim(w_td)[2])[-i]){if (rownames(w_td)[i] %in% rownames(Tax_corp) & colnames(w_td)[j] %in% rownames(Tax_corp)) 
    {w[rownames(w_td)[i], colnames(w_td)[j]]<-w_td[i, j]}
    else {}
    }}
  for (i in 1:length(rs)) {if (rownames(rs)[i] %in% rownames(w)) 
  {rse[rownames(rs)[i],1]<-rs[i]}
    else {}}
  tradematch=c(tradematch,rse)
  neib.td<-w%*%Tax[, as.character(year.lag[k])] 
  Neib.td<-c(Neib.td, neib.td)
}


fit.td<-lm(dv.tax.c~dv.tax.c.lg
           +as.factor(cty)
           +as.factor(year)
           +infl
           +grow
           +unemp
           +I(dp65+dp14)
           +gdppc
           +capopen
           +tradeopen
           +Neib.mdd
           # +Neib.lng
           #+Neib.portf
           #+Neib.fdi
           #+Neib.seexp
           +Neib.td
)
summary(fit.td)


## 5. IGO networks:
igo.em<-dget("W/igo/Matrix.igo.em.8900")
igo.sm<-dget("W/igo/Matrix.igo.8900")

igo1998<-dget("W/igo/igo1998")
igo1999<-dget("W/igo/igo1999")
igo2000<-dget("W/igo/igo2000")
IGO<-list(igo1998, igo1999, igo2000)
IGO.sm<-list(igo.sm[["1998"]], igo.sm[["1999"]], igo.sm[["2000"]])
IGO.em<-list(igo.em[["1998"]], igo.em[["1999"]], igo.em[["2000"]])

year.lag<-c(1998:2000)


Neib.igo<-rep(NA, 1*dim(Tax_corp)[1]) # we don't have data for 1998's lag
igomatch<-rep(NA, 1*dim(Tax_corp)[1])
K=21
for (k in 1:3){
  igo<-IGO[[k]]
  chs<-intersect(rownames(Tax_corp), rownames(igo))
  
  for (m in 1:dim(igo)[1]){igo[m,][igo[m,]<sort(as.numeric(igo[m, ]), decreasing = T)[K]]<-0}
  
  rs=as.matrix(rowSums(igo[chs,chs]))
  w_igo<-igo[chs, chs]/rowSums(igo[chs, chs], na.rm=T); 
  w_igo[is.na(w_igo)]<-0 # need to do a row-standardization for export data:
  
  w<-matrix(0, ncol=length(rownames(Tax_corp)), nrow=length(rownames(Tax_corp)))
  colnames(w)<-rownames(w)<-rownames(Tax_corp)
  
  rse=as.matrix(rep(0,133))
  rownames(rse)=rownames(w)
    
  for (i in 1:dim(w_igo)[1])      {
    for (j in (1:dim(w_igo)[2])[-i]){if (rownames(w_igo)[i] %in% rownames(Tax_corp) & colnames(w_igo)[j] %in% rownames(Tax_corp)) 
    {w[rownames(w_igo)[i], colnames(w_igo)[j]]<-w_igo[i, j]}
    else {}
    }}
  for (i in 1:length(rs)) {if (rownames(rs)[i] %in% rownames(w)) 
  {rse[rownames(rs)[i],1]<-rs[i]}
  else {}}
  igomatch=c(igomatch,rse)
  
  neib.igo<-w%*%Tax[, as.character(year.lag[k])] 
  
  Neib.igo<-c(Neib.igo, neib.igo)
}

Neib.igo<-c(Neib.igo, rep(NA, 5*dim(Tax_corp)[1])) # we don't have data for 1998's lag
igomatch=c(igomatch, rep(NA, 5*dim(Tax_corp)[1])) # we don't have data for 1998's lag


fit.igo<-lm(dv.tax.c~dv.tax.c.lg
            #+as.factor(cty)
            +as.factor(year)
            +infl
            +grow
            +unemp
            +I(dp65+dp14)
            +gdppc
            +capopen
            +tradeopen
            +Neib.mdd
            +Neib.igo
            , na.action=na.omit)
summary(fit.igo)





Neib.igo.sm<-rep(NA, 1*dim(Tax_corp)[1]) # we don't have data for 1998's lag
igo.smmatch=rep(NA, 1*dim(Tax_corp)[1]) # we don't have data for 1998's lag
K=21
for (k in 1:3){
  igo<-IGO.sm[[k]]
  chs<-intersect(rownames(Tax_corp), rownames(igo))
  
  for (m in 1:dim(igo)[1]){igo[m,][igo[m,]<sort(as.numeric(igo[m, ]), decreasing = T)[K]]<-0}
  
  rs=as.matrix(rowSums(igo[chs,chs]))
  w_igo<-igo[chs, chs]/rowSums(igo[chs, chs], na.rm=T); 
  w_igo[is.na(w_igo)]<-0 # need to do a row-standardization for export data:
  
  w<-matrix(0, ncol=length(rownames(Tax_corp)), nrow=length(rownames(Tax_corp)))
  colnames(w)<-rownames(w)<-rownames(Tax_corp)
  
  rse=as.matrix(rep(0,133))
  rownames(rse)=rownames(w)
    
  for (i in 1:dim(w_igo)[1])      {
    for (j in (1:dim(w_igo)[2])[-i]){if (rownames(w_igo)[i] %in% rownames(Tax_corp) & colnames(w_igo)[j] %in% rownames(Tax_corp)) 
    {w[rownames(w_igo)[i], colnames(w_igo)[j]]<-w_igo[i, j]}
    else {}
    }}
  for (i in 1:length(rs)) {if (rownames(rs)[i] %in% rownames(w)) 
  {rse[rownames(rs)[i],1]<-rs[i]}
  else {}}
  igo.smmatch=c(igo.smmatch,rse)
  
  neib.igo<-w%*%Tax[, as.character(year.lag[k])] 
  
  Neib.igo.sm<-c(Neib.igo.sm, neib.igo)
}

Neib.igo.sm<-c(Neib.igo.sm, rep(NA, 5*dim(Tax_corp)[1])) # we don't have data for 1998's lag
igo.smmatch<-c(igo.smmatch, rep(NA, 5*dim(Tax_corp)[1])) # we don't have data for 1998's lag

fit.igo<-lm(dv.tax.c~dv.tax.c.lg
            #+as.factor(cty)
            +as.factor(year)
            +infl
            +grow
            +unemp
            +I(dp65+dp14)
            +gdppc
            +capopen
            +tradeopen
            +Neib.mdd
            #+Neib.lng
            # +Neib.portf
            # +Neib.fdi
            #+Neib.seexp
            +Neib.igo.sm
            #+Neib.td
            , na.action=na.omit)
summary(fit.igo)


Neib.igo.em<-rep(NA, 1*dim(Tax_corp)[1]) # we don't have data for 1998's lag
igo.emmatch=rep(NA, 1*dim(Tax_corp)[1]) # we don't have data for 1998's lag
K=21
for (k in 1:3){
  igo<-IGO.em[[k]]
  chs<-intersect(rownames(Tax_corp), rownames(igo))
  
  for (m in 1:dim(igo)[1]){igo[m,][igo[m,]<sort(as.numeric(igo[m, ]), decreasing = T)[K]]<-0}
  
  rs=as.matrix(rowSums(igo[chs,chs]))
  w_igo<-igo[chs, chs]/rowSums(igo[chs, chs], na.rm=T); 
  w_igo[is.na(w_igo)]<-0 # need to do a row-standardization for export data:
  
  w<-matrix(0, ncol=length(rownames(Tax_corp)), nrow=length(rownames(Tax_corp)))
  colnames(w)<-rownames(w)<-rownames(Tax_corp)
  
  rse=as.matrix(rep(0,133))
  rownames(rse)=rownames(w)
  
  for (i in 1:dim(w_igo)[1])      {
    for (j in (1:dim(w_igo)[2])[-i]){if (rownames(w_igo)[i] %in% rownames(Tax_corp) & colnames(w_igo)[j] %in% rownames(Tax_corp)) 
    {w[rownames(w_igo)[i], colnames(w_igo)[j]]<-w_igo[i, j]}
    else {}
    }}
  for (i in 1:length(rs)) {if (rownames(rs)[i] %in% rownames(w)) 
  {rse[rownames(rs)[i],1]<-rs[i]}
  else {}}
  igo.emmatch=c(igo.emmatch,rse)
  
  neib.igo<-w%*%Tax[, as.character(year.lag[k])] 
  
  Neib.igo.em<-c(Neib.igo.em, neib.igo)
}

Neib.igo.em<-c(Neib.igo.em, rep(NA, 5*dim(Tax_corp)[1])) # we don't have data for 1998's lag
igo.emmatch=c(igo.emmatch, rep(NA, 5*dim(Tax_corp)[1]))

fit.igo<-lm(dv.tax.c~dv.tax.c.lg
            #+as.factor(cty)
            +as.factor(year)
            +infl
            +grow
            +unemp
            +I(dp65+dp14)
            +gdppc
            +capopen
            +tradeopen
            +Neib.mdd
            +Neib.igo.em
            , na.action=na.omit)
summary(fit.igo)

#imputations and simulated regressions

#############################################################################
## Replication Script: Uji, Park, and Perez
## Final Project for Gov 2001
## May 3, 2015
#############################################################################


getwd()
setwd("E:/Gov2001replication/Replication")
list.files() 




#############################################################################
## read in workspace
#############################################################################
load("imputations.RData")
load("zachData.RData")

load("imputationsFinal.RData")

install.packages("Amelia")
library(Amelia)

install.packages("plm")
library(plm)

install.packages("ggplot2")
library(ggplot2)


## Gov 2001: Replication Script 2
## Imputation using Amelia
## May 1, 2015


getwd()
setwd("E:/Gov2001replication/Replication")
list.files() 




#############################################################################
## read in workspace
#############################################################################
load("zachData.RData")
load("imputationsFinal.RData")
load("imputations.RData")
load("imputedPanelReady.RData")
load("alternativeImputations.RData")
load("completeData.RData")

names(allNeib.newdata.lag.tax)
allNeib.newdata <- allNeib.newdata.lag.tax[,-c(4,12:20)]
names(allNeib.newdata)
allNeib.newdata

install.packages("Amelia")
library(Amelia)

## For regression
install.packages("DataCombine")
library(DataCombine)
View(allNeib.newdata)

allNeib.newdata <- merge(allNeib.newdata, datafoo, by=c("country","year"), all=TRUE)
names(allNeib.newdata)
View(allNeib.newdata)

allNeib.newdata.lag.tax <- slide(allNeib.newdata, Var = "dv.tax.c", GroupVar = "country", slideBy = -1)
allNeib.newdata.lag.tax$dv.tax.c.lg <- allNeib.newdata.lag.tax$dv.tax.c-1
allNeib.newdata.lag.tax <- allNeib.newdata.lag.tax[,-20]
allNeib.newdata.lag.tax <- allNeib.newdata.lag.tax[c(1:3,20,4:19)]
names(allNeib.newdata.lag.tax)

save(allNeib.newdata, allNeib.newdata.lag.tax, file = "completeData.RData")





##########################################################################
## Describe data
##########################################################################


cao_missingMap <- missmap(a.alternative)
names(allNeib.newdata.portf.out)

## pre-imputation transformations
hist(allNeib.newdata.portf.out$log.infl.9)
hist(allNeib.newdata.portf.out$log.unemp)
hist(allNeib.newdata.portf.out$log.gdppc)
hist(allNeib.newdata.portf.out$log.tradeopen)
qplot(infl, geom="histogram") 

qplot(infl,
      geom="histogram",
      binwidth = 1,  
      main = "Histogram for Inflation", 
      xlab = "Inflation",  
      fill=I("blue"), 
      col=I("red"), 
      alpha=I(.2),
      xlim=c(20,50))

density(infl)



plot( p1, col=rgb(0,0,1,1/4), xlim=c(0,10))  # first histogram
plot( p2, col=rgb(1,0,0,1/4), xlim=c(0,10), add=T)  # second







#############################################################################
## create final panel data set
#############################################################################

## Zach's transformation of Cao's variables into a dataframe
DP <- dp14+dp65
dataimp <- data.frame(as.character(cty), year, dv.tax.c,dv.tax.c.lg, 
                      log(infl+9),
                      grow, log(unemp), DP, log(gdppc),
                      capopen, log(tradeopen),
                      Neib.mdd) 
colnames(dataimp)[1] <- "country"
names(dataimp)
View(dataimp)

## missing Neib.fdi, Neib.igo, Neib.lng, Neib.portf,
# Neib.seexp, Neib.td
# fdi, portf, and seexp are 'named vectors'

NeibPanels <- caoPanel[,c(1,2,15,16,18:22)]
names(caoPanel)
names(NeibPanels)


## Convert to panel data format, and exclude lagged tax var.
newdata <- dataimp[order(dataimp$country),]
names(newdata)
colnames(newdata)[5] <- "log.infl.9"
colnames(newdata)[7] <- "log.unemp"
colnames(newdata)[9] <- "log.gdppc"
colnames(newdata)[11] <- "log.tradeopen"
head(newdata)
newdata <- newdata[,-4]


## FINAL DATA: - tax lag, - Neib.portf ----> allNeib.newdata.portf.out
names(allNeib.newdata)

allNeib.newdata.portf.out <- allNeib.newdata[,-14]
names(allNeib.newdata.portf.out)


save(allNeib.newdata, allNeib.newdata.lag.tax, allNeib.newdata.portf.out, file = "completeData.RData")


#   ## What is causing perfect collinearity?
#   foo1 <- lm(dv.tax.c ~ country + year + grow, data = allNeib.newdata)
#   foo2 <- lm(dv.tax.c ~ country + year + grow + log.infl.9, data = allNeib.newdata)
#   foo3 <- lm(dv.tax.c ~ country + year + grow + log.infl.9 + log.gdppc, data = allNeib.newdata)
#   foo4 <- lm(dv.tax.c ~ country + year + grow + log.infl.9 + log.gdppc + log.unemp, data = allNeib.newdata)
#   foo5 <- lm(dv.tax.c ~ country + year + grow + log.infl.9 + log.gdppc + log.unemp + DP, data = allNeib.newdata)
#   foo6 <- lm(dv.tax.c ~ country + year + grow + 
#                log.infl.9 + log.gdppc + log.unemp + DP + log.tradeopen, data = allNeib.newdata)
#   foo7 <- lm(dv.tax.c ~ country + year + grow + 
#                log.infl.9 + log.gdppc + log.unemp + DP + log.tradeopen + capopen, data = allNeib.newdata)
#   foo8 <- lm(dv.tax.c ~ country + year + grow + 
#                log.infl.9 + log.gdppc + log.unemp + DP + log.tradeopen +
#                Neib.mdd, data = allNeib.newdata) # capopen drops <- Neib.mdd
#   foo9 <- lm(dv.tax.c ~ country + year + grow + 
#                log.infl.9 + log.gdppc + log.unemp + DP + log.tradeopen +
#                Neib.mdd + Neib.lng + Neib.igo +
#                Neib.dst + Neib.fdi + Neib.cap + Neib.seexp + capopen, data = allNeib.newdata) # capopen drops <- Neib.mdd
#   ## NA's created as a result of foo9 
#   #   <- NAs indicate Neib.portf is linearly related to Neib.fdi
#   summary(foo9)






#############################################################################
## Run Amelia with time settings (polytime = 0)
#############################################################################

# ##########
# ## allNeib.newdata
# ##########
#   
#   a.allNeib.newdata.time <- amelia(allNeib.newdata, m=5, ts="year", cs="country",  
#                       lags = c("year", "dv.tax.c"), 
#                       intercs = TRUE,
#                       incheck = FALSE,
#                       empri = .10*nrow(allNeib.newdata),
#                       polytime = 0,
#                       bound = rbind(c(3, 0, Inf), # dv.tax.c
#                                     c(7, 0, Inf), # DP
#                                     c(4, 0, Inf), # infl
#                                     c(6, 0, Inf)), # unemp
# #                                     c(colnum, lb, ub)        
#   )
#   a.allNeib.newdata.time
#   
#   names(allNeib.newdata)
#   allNeib.newdata.portf.out <- allNeib.newdata[,-18]
#   names(allNeib.newdata.portf.out)
#   
# 
#   hist(a.allNeib.newdata.time$imputations[[1]]$log.unemp)
#   hist(allNeib.newdata$log.unemp)
#   names(allNeib.newdata)
#   
#   usa.a.allNeib.newdata.time <- tscsPlot(a.allNeib.newdata.time, incheck=FALSE, cs = "USA", main = "USA (with time settings)", var = "Neib.fdi")
#   jpn.a.allNeib.newdata.time <- tscsPlot(a.allNeib.newdata.time, cs = "JPN", var = "dv.tax.c")
#   gmy.a.allNeib.newdata.time <- tscsPlot(a.allNeib.newdata.time, cs = "GMY", var = "dv.tax.c")
#   
#   par(mfrow=c(1,2))
#   overimp.newdata.time <- overimpute(a.allNeib.newdata.time, var = "dv.tax.c", main = "Overimpute (with time settings)")  
#     compare.density(a.newdata.time, var = "dv.tax.c")
#   






###########################################################
## allNeib.newdata.portf.out
###########################################################  

## Neib.lng, Neib.mdd, and capopen as "nominal"

a.lng.cap.mdd.noms <- amelia(allNeib.newdata.portf.out, m=5, ts="year", cs="country",  
                             lags = c("year", "dv.tax.c"), 
                             intercs = TRUE,
                             incheck = FALSE,
                             empri = .10*nrow(allNeib.newdata.portf.out),
                             noms = c("capopen","Neib.mdd", "Neib.lng"),
                             polytime = 0,
                             bound = rbind(c(3, 0, Inf), # dv.tax.c
                                           c(7, 0, Inf) # DP
                             ))
a.lng.cap.mdd.noms

View(a.allNeib.newdata.portf.out.time$imputations[[1]])


plot.new()
par(mfrow=c(3,3))
compare.density(a.lng.cap.noms, var = "dv.tax.c", legend = "")
compare.density(a.lng.cap.noms, var = "DP", legend = "")
compare.density(a.lng.cap.noms, var = "log.unemp", legend = "")
compare.density(a.lng.cap.noms, var = "grow", legend = "")
compare.density(a.lng.cap.noms, var = "log.tradeopen", legend = "")
compare.density(a.lng.cap.noms, var = "capopen", legend = "")
compare.density(a.lng.cap.noms, var = "log.infl.9", legend = "")
compare.density(a.lng.cap.noms, var = "log.gdppc", legend = "")
compare.density(a.lng.cap.noms, var = "Neib.mdd", legend = "")
compare.density(a.lng.cap.noms, var = "Neib.seexp", legend = "")
compare.density(a.lng.cap.noms, var = "Neib.td", legend = "")
compare.density(a.lng.cap.noms, var = "Neib.lng", legend = "")
compare.density(a.lng.cap.noms, var = "Neib.igo", legend = "")
compare.density(a.lng.cap.noms, var = "Neib.fdi", legend = "")
names(allNeib.newdata.portf.out)


save.image(file = "imputationsFinal.RData")  






#########################################################################################
## Setting "noms" argument takes an eternity for imputations
## May be a faulty assumption
#########################################################################################

## No nominal settings on capopen, Neib.mdd, or Neib.lng

a.alternative <- amelia(allNeib.newdata.portf.out,
                        m=5,
                        ts="year",
                        cs="country",
                        lags=c("year","dv.tax.c"),
                        #                         noms = c("capopen","Neib.mdd","Neib.lng"),
                        #                         intercs = TRUE,
                        incheck =FALSE,
                        empri=.10*nrow(allNeib.newdata.portf.out),
                        #                         polytime=0,
                        bound = rbind(c(3,0,Inf),
                                      c(7,0,Inf)))
a.alternative

Chain Lengths:
  --------------
  Imputation 1:  106
Imputation 2:  118
Imputation 3:  116
Imputation 4:  119
Imputation 5:  113


plot.new()
par(mfrow=c(3,3))
compare.density(a.alternative, var = "dv.tax.c", legend = "")
compare.density(a.alternative, var = "DP", legend = "")
compare.density(a.alternative, var = "log.unemp", legend = "")
compare.density(a.alternative, var = "grow", legend = "")
compare.density(a.alternative, var = "log.tradeopen", legend = "")
compare.density(a.alternative, var = "capopen", legend = "")
compare.density(a.alternative, var = "log.infl.9", legend = "")
compare.density(a.alternative, var = "log.gdppc", legend = "")
compare.density(a.alternative, var = "Neib.mdd", legend = "")

compare.density(a.alternative, var = "Neib.seexp", legend = "")
compare.density(a.alternative, var = "Neib.td", legend = "")
compare.density(a.alternative, var = "Neib.lng", legend = "")
compare.density(a.alternative, var = "Neib.igo", legend = "")
compare.density(a.alternative, var = "Neib.fdi", legend = "")

USA <- tscsPlot(a.alternative, var = "dv.tax.c", cs="USA",main="USA(with time settings)")
JPN <- tscsPlot(a.alternative, var = "dv.tax.c", cs="JPN",main="JPN(with time settings)")
GMY <- tscsPlot(a.alternative, var = "dv.tax.c", cs="GMY",main="GMY(with time settings)")
BEL<- tscsPlot(a.alternative, var = "dv.tax.c", cs="BEL",main="BEL(with time settings)")
ARG <- tscsPlot(a.alternative, var = "dv.tax.c", cs="ARG",main="ARG(with time settings)")

save(allNeib.newdata, allNeib.newdata.portf.out, 
     domvars, domvars.in,a.alternative,
     file = "alternativeImputations.RData")




###########################################################
## Post-imputation transformations
###########################################################
## un-log infl, unemp, gdppc, tradeopen



a.alternative.detrans1 <- transform(a.alternative, log.infl.9 = (exp(log.infl.9)-9)) 
head(a.alternative.detrans1$imputations[[1]]$log.infl.9)
head(a.alternative$imputations[[1]]$log.infl.9)
names(a.alternative.detrans1$imputations[[1]])
colnames(a.alternative.detrans1$imputations[[5]])[4] <- "infl"

a.alternative.detrans2 <- transform(a.alternative.detrans1, log.unemp = exp(log.unemp))
head(a.alternative.detrans2$imputations[[1]]$log.unemp)
head(a.alternative.detrans1$imputations[[1]]$log.unemp)
names(a.alternative.detrans2$imputations[[1]])
colnames(a.alternative.detrans2$imputations[[1]])[6] <- "unemp"
colnames(a.alternative.detrans2$imputations[[5]])[6] <- "unemp" 
colnames(a.alternative.detrans2$imputations[[4]])[6] <- "unemp"
colnames(a.alternative.detrans2$imputations[[3]])[6] <- "unemp"
colnames(a.alternative.detrans2$imputations[[2]])[6] <- "unemp"

a.alternative.detrans3 <- transform(a.alternative.detrans2, log.tradeopen = exp(log.tradeopen))
head(a.alternative.detrans2$imputations[[1]]$log.tradeopen)
head(a.alternative.detrans3$imputations[[1]]$log.tradeopen)
names(a.alternative.detrans3$imputations[[1]])
colnames(a.alternative.detrans3$imputations[[1]])[10] <- "tradeopen"
colnames(a.alternative.detrans3$imputations[[5]])[10] <- "tradeopen" 
colnames(a.alternative.detrans3$imputations[[4]])[10] <- "tradeopen"
colnames(a.alternative.detrans3$imputations[[3]])[10] <- "tradeopen"
colnames(a.alternative.detrans3$imputations[[2]])[10] <- "tradeopen"

a.alternative.detrans4 <- transform(a.alternative.detrans3, log.gdppc = exp(log.gdppc))
head(a.alternative.detrans3$imputations[[1]]$log.gdppc)
head(a.alternative.detrans4$imputations[[1]]$log.gdppc)
names(a.alternative.detrans4$imputations[[1]])
colnames(a.alternative.detrans4$imputations[[5]])[8] <- "gdppc"
colnames(a.alternative.detrans4$imputations[[4]])[8] <- "gdppc"
colnames(a.alternative.detrans4$imputations[[3]])[8] <- "gdppc"
colnames(a.alternative.detrans4$imputations[[2]])[8] <- "gdppc"
colnames(a.alternative.detrans4$imputations[[1]])[8] <- "gdppc"






############################################################################
## Modeling using imputed panel data (includes CapOpen, Tax Lagged)
############################################################################

## test-run on a non-network model:

## a.alternative
b.out <- NULL
se.out <- NULL
for(i in 1:a.alt.fin$m){
  fit.node.out <- lm(dv.tax.c ~ lag(dv.tax.c)
                     +infl
                     +grow
                     +unemp
                     +DP
                     +gdppc
                     +capopen
                     +tradeopen
                     +as.factor(country)
                     +as.factor(year),
                     data = a.alt.fin$imputations[[i]]
  )
  b.out <- rbind(b.out, fit.node.out$coef)
  se.out <- rbind(se.out, coef(summary(fit.node.out))[,2])
}
combined.results <- mi.meld(q = b.out, se = se.out)

install.packages("Zelig")
require(Zelig)


## non-network model
z.node.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                    +infl
                    +grow
                    +unemp
                    +DP
                    +gdppc
                    +capopen
                    +tradeopen
                    +as.factor(country)
                    +as.factor(year),
                    data = a.alt.fin$imputations,
                    model = "ls"
)

summary(z.node.imp)


## distance
z.space.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                     +infl
                     +grow
                     +unemp
                     +DP
                     +gdppc
                     +capopen
                     +tradeopen
                     +Neib.mdd
                     +as.factor(country)
                     +as.factor(year),
                     data = a.alt.fin$imputations,
                     model = "ls"
)
summary(z.space.imp)



## language only
z.onlylng.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                       +infl
                       +grow
                       +unemp
                       +DP
                       +gdppc
                       +capopen
                       +tradeopen
                       #                    +Neib.mdd
                       +Neib.lng
                       +as.factor(country)
                       +as.factor(year),
                       data = a.alt.fin$imputations,
                       model = "ls"
)
summary(z.onlylng.imp)


## space & language
z.lng.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                   +infl
                   +grow
                   +unemp
                   +DP
                   +gdppc
                   +capopen
                   +tradeopen
                   +Neib.mdd
                   +Neib.lng
                   +as.factor(country)
                   +as.factor(year),
                   data = a.alt.fin$imputations,
                   model = "ls"
)
summary(z.lng.imp)



## fdi
z.fdi.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                   +as.factor(country)
                   +as.factor(year)
                   +infl
                   +grow
                   +unemp
                   +DP
                   +gdppc
                   +capopen
                   +tradeopen
                   +Neib.mdd
                   # +Neib.lng
                   # +Neib.portf
                   +Neib.fdi,
                   data = a.alt.fin$imputations,
                   model = "ls"
)
summary(z.fdi.imp)


## structural equivalence in trade
z.seexp.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                     +as.factor(country)
                     +as.factor(year)
                     +infl
                     +grow
                     +unemp
                     +DP
                     +gdppc
                     +capopen
                     +tradeopen
                     +Neib.mdd
                     # +Neib.lng
                     # +Neib.fdi
                     +Neib.seexp,
                     data = a.alt.fin$imputations,
                     model = "ls"
)
summary(z.seexp.imp)



## trade connections (no directionality)
z.td.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                  +as.factor(country)
                  +as.factor(year)
                  +infl
                  +grow
                  +unemp
                  +DP
                  +gdppc
                  +capopen
                  +tradeopen
                  +Neib.mdd
                  # +Neib.lng
                  # +Neib.fdi
                  +Neib.td,
                  data = a.alt.fin$imputations,
                  model = "ls"
)
summary(z.td.imp)



## igo
z.igo.imp <- zelig(dv.tax.c ~ lag(dv.tax.c)
                   +as.factor(country)
                   +as.factor(year)
                   +infl
                   +grow
                   +unemp
                   +DP
                   +gdppc
                   +capopen
                   +tradeopen
                   +Neib.mdd
                   # +Neib.lng
                   # +Neib.fdi
                   +Neib.igo,
                   data = a.alt.fin$imputations,
                   model = "ls"
)
summary(z.igo.imp)





########################################################################
## create tables
########################################################################

install.packages('stargazer')
library(stargazer)
install.packages("xtable")
library(xtable)
# 
# lm1 <- as.data.frame(summary(z.space.imp)$coef) 
# lm2 <- as.data.frame(summary(z.onlylng.imp)$coef) 
# lm3 <- as.data.frame(summary(z.lng.imp)$coef)
# 
# lm1[,2] <- ifelse(lm1[,4]<.001,paste(lm1[,2],"***",sep=" "), 
#                   ifelse(lm1[,4]<.01,paste(lm1[,2],"**",sep=" "), 
#                          ifelse(lm1[,4]<.05,paste(lm1[,2],"*",sep=" "), 
#                                 ifelse(lm1[,4]<.1,paste(lm1[,2],".",sep=" 
#                                                         "),lm1[,2])))) 
# 
# lm2[,2] <- ifelse(lm2[,4]<.001,paste(lm2[,2],"***",sep=" "), 
#                   ifelse(lm2[,4]<.01,paste(lm2[,2],"**",sep=" "), 
#                          ifelse(lm2[,4]<.05,paste(lm2[,2],"*",sep=" "), 
#                                 ifelse(lm2[,4]<.1,paste(lm2[,2],".",sep=" 
#                                                         "),lm2[,2])))) 
# 
# lm3[,2] <- ifelse(lm3[,4]<.001,paste(lm3[,2],"***",sep=" "), 
#                   ifelse(lm3[,4]<.01,paste(lm3[,2],"**",sep=" "), 
#                          ifelse(lm3[,4]<.05,paste(lm3[,2],"*",sep=" "), 
#                                 ifelse(lm3[,4]<.1,paste(lm3[,2],".",sep=" 
#                                                         "),lm3[,2])))) 
# 
# 
# ## ONE OPTIONS ## 
# lms <- as.data.frame(lm1[,1],lm2[,1],lm3[,1],lm1[,2],lm2[,2],lm3[,2]) 
# row.names(lms) <- row.names(lm3) 
# colnames(lms) <- c("Geography","Language","Geography & Language","Geography SE","Language SE", "Geography & Language SE") 
# xtable(lms)


m = 5
lm.imputed1 <- zelig(infl ~ trade + civlib, model="ls",data = imp1) 
lm.imputed2 <- zelig(infl ~ trade + civlib, model="ls",data = imp2) 


for(i in 1:m){ 
  print(stargazer(as.data.frame(summary(z.space.imp[[i]])$coef),as.data.frame(summary(z.onlylng.imp[[i]])$coef)))
} 





#and match
load('alternativeImputations.RData')
a.out=a.alternative
vars=c('cty','year','dv.tax.c', 'infl', 'grow', 'unemp', 'DP', 'gdppc', 'capopen', 
       'tradeopen', 'Neib.mdd','Neib.lng', 'Neib.fdi', 'Neib.seexp', 'Neib.td', 'Neib.igo',
       'Neib.igo.sm', 'Neib.igo.em')
regvars=c('cty','year','dv.tax.c', 'dv.tax.c.lg','infl', 'grow', 'unemp', 'DP', 'gdppc', 'capopen', 
       'tradeopen')


alldata=a.out$imputations[[1]]
colnames(alldata)=vars

mahco.out = matrix(NA,nrow=5,ncol=7)
mahse.out = matrix(NA,nrow=5,ncol=7)
ucemco.out = matrix(NA,nrow=5,ncol=7)
ucemse.out = matrix(NA,nrow=5,ncol=7)
#keeps=c('dv.tax.c.lg','infl','grow','unemp','DP','gdppc','capopen','tradeopen','Neib')


#Determine CEM cut points for manual CEM
hist(log(alldata$infl),breaks=50);median(alldata$infl)
hist(alldata$infl[alldata$infl<4],breaks=40);median(alldata$infl)
hist(alldata$grow,breaks=50)
hist(alldata$unemp,breaks=30)
hist(alldata$DP,breaks=20)
hist(alldata$gdppc[alldata$gdppc<100],breaks=40)
hist(alldata$capopen,breaks=30)
hist(alldata$tradeopen[alldata$tradeopen<100],breaks=60)

inflcut <- c(9,12)
growcut = c(0,5)
unempcut = c(10)
DPcut = c(30,35)
gdppccut=c(10)
capopencut = c(0)
tradeopencut = c(70)

my.cutpoints <- list(infl=inflcut,
                     grow=growcut, unemp=unempcut, DP=DPcut, 
                     gdppc=gdppccut, capopen=capopencut,
                     tradeopen=tradeopencut)

load('completePanel.RData')
#match and regress

i=1
q=1
treatbound=matrix(NA,nrow=5,ncol=7)
for (i in 1:length(a.out$imputations)) {
  alldata=a.out$imputations[[i]]
  colnames(alldata)=vars
  alldata=alldata[alldata$cty %in% rownames(Tax_corp), ]
  alldata$infl=exp(alldata$infl)-9
  alldata$unemp=exp(alldata$unemp)
  alldata$gdppc=exp(alldata$gdppc)
  alldata$tradeopen=exp(alldata$tradeopen)
  x <- slide(alldata, Var = "dv.tax.c", GroupVar = "cty", slideBy = -1)
  alldata$dv.tax.c.lg <- x$'dv.tax.c-1'
  alldata <- alldata[order(alldata$year),] 
  
  matchlist=list(geomatch,fdimatch,seexpmatch,tradematch,igomatch,igo.smmatch,igo.emmatch)
  Neiblist=list(alldata$Neib.mdd,alldata$Neib.fdi,alldata$Neib.seexp,alldata$Neib.td,
                alldata$Neib.igo,alldata$Neib.igo.sm,alldata$Neib.igo.em)
  
  for (q in 1:length(matchlist)) {
  treat=as.numeric(matchlist[[q]])
  #hist(treat,breaks=100);summary(na.omit(treat))
  treatbound=summary(na.omit(treat))[5]
    #mean(na.omit(treat));treatbound
  treat[which(treat<=treatbound)]=0
  treat[which(treat>treatbound)]=1
  data=alldata[regvars]
  data$treat=treat
  data$Neib=Neiblist[[q]]
  #data$Neib.mdd=Neib
  data=data[complete.cases(data),]

#Mahalanobis matching
mah.match <- matchit(formula = treat ~ infl+grow+unemp+DP+gdppc+capopen+tradeopen,
           data = data,method = "nearest",distance = "mahalanobis",discard="control")
mah.match
mah.matched = match.data(mah.match)
imbalance(group=mah.matched$treat, data=mah.matched,
  drop=c("treat", "dv.tax.c", "cty","year","distance", "weights","Neib","dv.tax.c.lg"))

mah.model <- lm(dv.tax.c ~ dv.tax.c.lg+as.factor(cty)+as.factor(year)+infl+grow+unemp
        +DP+gdppc+capopen+tradeopen+Neib,
        data = mah.matched)
summary(mah.model)
mahco.out[i,q] <- mah.model$coef['Neib']
mahse.out[i,q] <- coef(summary(mah.model))[,2]['Neib']

}
}
combined.mah = mi.meld(q=mahco.out, se = mahse.out)

1-pnorm(combined.mah$q[1]/combined.mah$se[1])+pnorm(-combined.mah$q[1]/combined.mah$se[1])
1-pnorm(combined.mah$q[2]/combined.mah$se[2])+pnorm(-combined.mah$q[2]/combined.mah$se[2])
1-pnorm(combined.mah$q[3]/combined.mah$se[3])+pnorm(-combined.mah$q[3]/combined.mah$se[3])
1-pnorm(combined.mah$q[4]/combined.mah$se[4])+pnorm(-combined.mah$q[4]/combined.mah$se[4])
1-pnorm(combined.mah$q[5]/combined.mah$se[5])+pnorm(-combined.mah$q[5]/combined.mah$se[5])
1-pnorm(combined.mah$q[6]/combined.mah$se[6])+pnorm(-combined.mah$q[6]/combined.mah$se[6])
1-pnorm(combined.mah$q[7]/combined.mah$se[7])+pnorm(-combined.mah$q[7]/combined.mah$se[7])


for (i in 1:length(a.out$imputations)) {
  alldata=a.out$imputations[[i]]
  colnames(alldata)=vars
  alldata=alldata[alldata$cty %in% rownames(Tax_corp), ]
  alldata$infl=exp(alldata$infl)-9
  alldata$unemp=exp(alldata$unemp)
  alldata$gdppc=exp(alldata$gdppc)
  alldata$tradeopen=exp(alldata$tradeopen)
  x <- slide(alldata, Var = "dv.tax.c", GroupVar = "cty", slideBy = -1)
  alldata$dv.tax.c.lg <- x$'dv.tax.c-1'
  alldata <- alldata[order(alldata$year),] 
  
  matchlist=list(geomatch,fdimatch,seexpmatch,tradematch,igomatch,igo.smmatch,igo.emmatch)
  Neiblist=list(alldata$Neib.mdd,alldata$Neib.fdi,alldata$Neib.seexp,alldata$Neib.td,
                alldata$Neib.igo,alldata$Neib.igo.sm,alldata$Neib.igo.em)
  
  for (q in 1:length(matchlist)) {
    treat=as.numeric(matchlist[[q]])
    hist(treat,breaks=100);summary(na.omit(treat))
    treatbound=summary(na.omit(treat))[3]
    #mean(na.omit(treat));treatbound
    treat[which(treat<=treatbound)]=0
    treat[which(treat>treatbound)]=1
    data=alldata[regvars]
    data$treat=treat
    data$Neib=Neiblist[[q]]
    #data$Neib.mdd=Neib
    data=data[complete.cases(data),]
    
    #User CEM
    user.cem.match = matchit(formula = treat ~ infl+grow+unemp+DP+gdppc+capopen+tradeopen,
                             data=data,method='cem', cutpoints = my.cutpoints)
    user.cem.match
    user.cem.data = match.data(user.cem.match)
    user.imb <- imbalance(group=user.cem.data$treat,
                          data=user.cem.data,
                          drop=c("treat","dv.tax.c","distance","weights","cty","year","subclass",
                                 "dv.tax.c.lg",'Neib'))
    user.imb
    ucem.model = lm(dv.tax.c ~ as.factor(cty)+as.factor(year)+dv.tax.c.lg+infl+grow+unemp+DP+gdppc
                    +capopen+tradeopen+Neib,data = user.cem.data)
    summary(ucem.model)
    
    ucemco.out[i,q] <- ucem.model$coef['Neib']
    ucemse.out[i,q] <- coef(summary(ucem.model))[,2]['Neib']
  }
}

combined.ucem = mi.meld(q = ucemco.out, se = ucemse.out)

combined.mah
combined.ucem

1-pnorm(combined.ucem$q[1]/combined.ucem$se[1])+pnorm(-combined.ucem$q[1]/combined.ucem$se[1])
1-pnorm(combined.ucem$q[2]/combined.ucem$se[2])+pnorm(-combined.ucem$q[2]/combined.ucem$se[2])
1-pnorm(combined.ucem$q[3]/combined.ucem$se[3])+pnorm(-combined.ucem$q[3]/combined.ucem$se[3])
1-pnorm(combined.ucem$q[4]/combined.ucem$se[4])+pnorm(-combined.ucem$q[4]/combined.ucem$se[4])
1-pnorm(combined.ucem$q[5]/combined.ucem$se[5])+pnorm(-combined.ucem$q[5]/combined.ucem$se[5])
1-pnorm(combined.ucem$q[6]/combined.ucem$se[6])+pnorm(-combined.ucem$q[6]/combined.ucem$se[6])
1-pnorm(combined.ucem$q[7]/combined.ucem$se[7])+pnorm(-combined.ucem$q[7]/combined.ucem$se[7])


save(combined.mah,file='malahanobis')
save(combined.ucem,file='ucem')



#Auto CEM
#auto.cem.match = matchit(formula = treat ~ infl
#                         +grow
#                         +unemp
#                         +DP
#                         +gdppc
#                         +capopen
#                         +tradeopen,
#                         data=data,method='cem')
#auto.cem.data = match.data(auto.cem.match)
#auto.cem.match
#auto.imb <- imbalance(group=auto.cem.data$treat,
#                      data=auto.cem.data,
#                      drop=c("treat","dv.tax.c","distance",
#                             "weights","cty","year",'subclass','dv.tax.c.lg','Neib'))
#auto.imb
#summary(auto.cem.match)
#acem.model = lm(dv.tax.c ~ 
#                  dv.tax.c.lg
#                +infl
#                +grow
#                +unemp
#                +DP
#                +gdppc
#                +capopen
#                +tradeopen
#                +Neib
#                +as.factor(cty)
#                +as.factor(year),
#                data = user.cem.data)
#summary(ucem.model)
#acemco.out[q+5*(i-1),] <- acem.model$coef[keeps]
#acemse.out[q+5*(i-1),] <- coef(summary(acem.model))[,2][keeps]